#############################################################
### HEADER
# Creators: Raha Imanirad & Maximilian J. Pany
# Course: Gov 2001 / Stat E-200
# Replication paper code
# Article:
#    Blowing smoke

# Replicated Article: 
#    Adoption and compliance in second-hand smoking bans: a global econometric analysis
#    Richard Perkins, Eric Neumayer
#    Int J Public Health (2014) 59:859–866

# Date: 05/04/2015
# R version: 3.1.3
### HEADER END
#############################################################

# cleaning house
rm(list=ls())
graphics.off()

# library
library(foreign)
library(plyr)
library(pastecs)
library(ggplot2)
library(RColorBrewer)
library(gridExtra)
library(xtable)

#############################################################
## need to set to local working directory containing the replication files
setwd("~/Google Drive/gov2001/paper/")
#############################################################


#############################################################
##### Replicating Perkins & Neumayer's (2014) Analyses ###### 
#############################################################
"
In order to replicate the analyses by Perkins & Neumayer (2014) exaclty, we run their graciously provided Stata DO file by calling Stata from R.
We do this not only for the year of data they chose to analyze (2010), but also for 2 other years available (2008 and 2012).
We chose to omit the one other year available (2007), as this was the first time the WHO report on the global tobacco epidemic was published and data collection and analysis methods were subsequently refined.
We then port the results obtained from Stata to R for visualization.
Note that you may have to change the Stata call to fit your version of Stata, and that you need to have both Stata and the Stata Terminal Utility installed.
Also note that executing the Stata DO files is expected to take several minutes.
"

#############################################################
## 2008
system("Stata-MP -b smoke_2008.do", intern = FALSE)
#############################################################

#############################################################
## 2010
system("Stata-MP -b smoke_2010.do", intern = FALSE)
#############################################################

#############################################################
## 2012
system("Stata-MP -b smoke_2012.do", intern = FALSE)
#############################################################


#############################################################
#### Replicating Tables and Extending to Two More Years ##### 
#############################################################

#############################################################
## Table 1
# importing
t1.2008 <- read.table("tmp/2008/table1.txt", sep="\t", header=TRUE, skip=2)
t1.2010 <- read.table("tmp/2010/table1.txt", sep="\t", header=TRUE, skip=2)
t1.2012 <- read.table("tmp/2012/table1.txt", sep="\t", header=TRUE, skip=2)

# resizing
t1.2008 <- t1.2008[2:5, ]
t1.2010 <- t1.2010[2:5, ]
t1.2012 <- t1.2012[2:5, ]
#############################################################

#############################################################
## Table 2
# importing
t2.2008 <- read.table("tmp/2008/table2.txt", sep="\t", header=TRUE, skip=2)
t2.2010 <- read.table("tmp/2010/table2.txt", sep="\t", header=TRUE, skip=2)
t2.2012 <- read.table("tmp/2012/table2.txt", sep="\t", header=TRUE, skip=2)

# resizing
t2.2008 <- t2.2008[2:5, ]
t2.2010 <- t2.2010[2:5, ]
t2.2012 <- t2.2012[2:5, ]
#############################################################

#############################################################
## Table 3
# importing
t3.2008 <- read.table("tmp/2008/table3.txt", sep="\t", header=TRUE, skip=2)
t3.2010 <- read.table("tmp/2010/table3.txt", sep="\t", header=TRUE, skip=2)
t3.2012 <- read.table("tmp/2012/table3.txt", sep="\t", header=TRUE, skip=2)

# resizing
t3.2008 <- t3.2008[2:11, -1]
t3.2010 <- t3.2010[2:11, -1]
t3.2012 <- t3.2012[2:11, -1]
#############################################################

#############################################################
## Table 4
# importing
t4.2008 <- read.table("tmp/2008/table4.txt", sep="\t", header=TRUE, skip=2)
t4.2010 <- read.table("tmp/2010/table4.txt", sep="\t", header=TRUE, skip=2)
t4.2012 <- read.table("tmp/2012/table4.txt", sep="\t", header=TRUE, skip=2)

# resizing
t4.2008 <- t4.2008[2:11, -1]
t4.2010 <- t4.2010[2:11, -1]
t4.2012 <- t4.2012[2:11, -1]
#############################################################


#############################################################
########## Visualizing analyses for all 3 years ############# 
#############################################################
"
Although we provide the code to reproduce Perkins & Neumayer (2014) tables 1-4 for all 3 years, we focus on tables 1 & 3 only going forward.
This is because the results of table 4 are very similar to table 3, and the results of table 2 do not vary much across the 3 years.
"

#############################################################
## 1. figure 1
"Note: the following code is messy and should be made more asthetically pleasing. But it works."

# extracting the relevant variable (pc gdp)
f1.2008 <- t1.2008[1,]
f1.2010 <- t1.2010[1,]
f1.2012 <- t1.2012[1,]

# turning factors -> strings
f1.2008 <- data.frame(lapply(f1.2008, as.character), stringsAsFactors=FALSE)
f1.2010 <- data.frame(lapply(f1.2010, as.character), stringsAsFactors=FALSE)
f1.2012 <- data.frame(lapply(f1.2012, as.character), stringsAsFactors=FALSE)


# rearranging
i <- seq(2, 8, by=2)
j <- seq(3, 9, by=2)

t.f1.2008 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t.f1.2008[k,] <- t(as.matrix(f1.2008[, i[[k]]:j[[k]]]))
}

t.f1.2010 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t.f1.2010[k,] <- t(as.matrix(f1.2010[, i[[k]]:j[[k]]]))
}

t.f1.2012 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t.f1.2012[k,] <- t(as.matrix(f1.2012[, i[[k]]:j[[k]]]))
}


# extracting CI as separate cells
t.f1.2008 <- as.data.frame(t.f1.2008, stringsAsFactors=FALSE)
ci.f1.2008 <- t(as.data.frame(as.numeric(unlist(strsplit(unlist(strsplit((unlist(strsplit(t.f1.2008$V2, " - "))), ")")), "\\(")))))

t.f1.2010 <- as.data.frame(t.f1.2010, stringsAsFactors=FALSE)
ci.f1.2010 <- t(as.data.frame(as.numeric(unlist(strsplit(unlist(strsplit((unlist(strsplit(t.f1.2010$V2, " - "))), ")")), "\\(")))))

t.f1.2012 <- as.data.frame(t.f1.2012, stringsAsFactors=FALSE)
ci.f1.2012 <- t(as.data.frame(as.numeric(unlist(strsplit(unlist(strsplit((unlist(strsplit(t.f1.2012$V2, " - "))), ")")), "\\(")))))


# restructuring them
i <- seq(2, 11, by=3)
j <- seq(3, 12, by=3)

t1.ci.2008 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t1.ci.2008[k,] <- t(as.matrix(ci.f1.2008[, i[[k]]:j[[k]]]))
}

t1.ci.2010 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t1.ci.2010[k,] <- t(as.matrix(ci.f1.2010[, i[[k]]:j[[k]]]))
}

t1.ci.2012 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t1.ci.2012[k,] <- t(as.matrix(ci.f1.2012[, i[[k]]:j[[k]]]))
}


# creating color vectors
color.2008 <- ifelse(t1.ci.2008[,1] > 0, 1, 0)
color.2010 <- ifelse(t1.ci.2010[,1] > 0, 1, 0)
color.2012 <- ifelse(t1.ci.2012[,1] > 0, 1, 0)


# combining y coordinates, point estimates, CIs, and color vector
y <- c(1,2,3,4)

f1.plot.2008 <- as.data.frame(cbind(y, gsub("\\*", "", t.f1.2008[,1]), t1.ci.2008, color.2008), stringsAsFactors=FALSE)
colnames(f1.plot.2008) <- c("y", "p", "low", "hi", "col")

f1.plot.2010 <- as.data.frame(cbind(y, gsub("\\*", "", t.f1.2010[,1]), t1.ci.2010, color.2010), stringsAsFactors=FALSE)
colnames(f1.plot.2010) <- c("y", "p", "low", "hi", "col")

f1.plot.2012 <- as.data.frame(cbind(y, gsub("\\*", "", t.f1.2012[,1]), t1.ci.2012, color.2012), stringsAsFactors=FALSE)
colnames(f1.plot.2012) <- c("y", "p", "low", "hi", "col")


# turning chars -> numeric
f1.plot.2008 <- data.frame(lapply(f1.plot.2008, as.numeric), stringsAsFactors=FALSE)
f1.plot.2010 <- data.frame(lapply(f1.plot.2010, as.numeric), stringsAsFactors=FALSE)
f1.plot.2012 <- data.frame(lapply(f1.plot.2012, as.numeric), stringsAsFactors=FALSE)


# transforming color indicator to actual color
f1.plot.2008$col <- ifelse(f1.plot.2008$col == 1, "red", "black")
f1.plot.2010$col <- ifelse(f1.plot.2010$col == 1, "red", "black")
f1.plot.2012$col <- ifelse(f1.plot.2012$col == 1, "red", "black")

# creating a custom color scale
colors <- colorRampPalette(c("black", "red"))(n = 2)
names(colors) <- levels(f1.plot.2008$col)
colScale <- scale_colour_manual(name = "cols", values = colors)

colRed <- scale_colour_manual(name = "cols", values = "#FF0000")



# plotting
gg1.plot.2008 <-
ggplot(f1.plot.2008, aes(x=p, y=y, colour=col)) + 
  geom_errorbarh(aes(xmin=low, xmax=hi), size = 1, height = .1) +
  geom_point(size = 4) +
  geom_vline(xintercept = 0, linetype="dashed") +  # creating line at 0 as reference
  ggtitle("2008") +
  colScale +
  coord_cartesian(xlim=c(-0.1, .17)) +
  scale_x_continuous(breaks=seq(-0.1, 0.15, 0.05)) +
  theme(plot.background = element_blank(), panel.background = element_blank(), panel.margin = unit(c(15,40,15,15), "points"),
        plot.title = element_text(hjust = 0.5, size = 24, color = "dodgerblue4", family="Helvetica", face = "bold"),
        axis.title = element_blank(), axis.text = element_text(size = 16), legend.position="none")

gg1.plot.2010 <-
ggplot(f1.plot.2010, aes(x=p, y=y, colour=col)) + 
  geom_errorbarh(aes(xmin=low, xmax=hi), size = 1, height = .1) +
  geom_point(size = 4) +
  geom_vline(xintercept = 0, linetype="dashed") +
  ggtitle("2010") +
  colRed +
  coord_cartesian(xlim=c(-0.1, .17)) +
  scale_x_continuous(breaks=seq(-0.1, 0.15, 0.05)) +
  theme(plot.background = element_blank(), panel.background = element_blank(), panel.margin = unit(c(15,40,15,15), "points"),
        plot.title = element_text(hjust = 0.5, size = 24, color = "dodgerblue4", family="Helvetica", face = "bold"),
        axis.title = element_blank(), axis.text = element_text(size = 16), legend.position="none")

gg1.plot.2012 <-
ggplot(f1.plot.2012, aes(x=p, y=y, colour=col)) + 
  geom_errorbarh(aes(xmin=low, xmax=hi), size = 1, height = .1) +
  geom_point(size = 4) +
  geom_vline(xintercept = 0, linetype="dashed") +
  ggtitle("2012") +
  colScale +
  coord_cartesian(xlim=c(-0.1, .17)) +
  scale_x_continuous(breaks=seq(-0.1, 0.15, 0.05)) +
  theme(plot.background = element_blank(), panel.background = element_blank(), panel.margin = unit(c(15,15,15,15), "points"),
        plot.title = element_text(hjust = 0.5, size = 24, color = "dodgerblue4", family="Helvetica", face = "bold"),
        axis.title = element_blank(), axis.text = element_text(size = 16), legend.position="none")

pdf("figure1.pdf", width = 25)
grid.arrange(gg1.plot.2008, gg1.plot.2010, gg1.plot.2012, ncol=3)
dev.off()
#############################################################

#############################################################
## figure 2
"Same note as above: the following code is messy and should be made more asthetically pleasing. But it works."

# extracting the relevant variables (pc gdp; health to gov expend)
f3.gdp.2008 <- t3.2008[2,]
f3.gdp.2010 <- t3.2010[2,]
f3.gdp.2012 <- t3.2012[2,]

f3.hexp.2008 <- t3.2008[4,]
f3.hexp.2010 <- t3.2010[4,]
f3.hexp.2012 <- t3.2012[4,]

# turning factors -> strings
f3.gdp.2008 <- data.frame(lapply(f3.gdp.2008, as.character), stringsAsFactors=FALSE)
f3.gdp.2010 <- data.frame(lapply(f3.gdp.2010, as.character), stringsAsFactors=FALSE)
f3.gdp.2012 <- data.frame(lapply(f3.gdp.2012, as.character), stringsAsFactors=FALSE)

f3.hexp.2008 <- data.frame(lapply(f3.hexp.2008, as.character), stringsAsFactors=FALSE)
f3.hexp.2010 <- data.frame(lapply(f3.hexp.2010, as.character), stringsAsFactors=FALSE)
f3.hexp.2012 <- data.frame(lapply(f3.hexp.2012, as.character), stringsAsFactors=FALSE)

# rearranging
i <- seq(2, 8, by=2)
j <- seq(3, 9, by=2)

t.f3.gdp.2008 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t.f3.gdp.2008[k,] <- t(as.matrix(f3.gdp.2008[, i[[k]]:j[[k]]]))
}

t.f3.gdp.2010 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t.f3.gdp.2010[k,] <- t(as.matrix(f3.gdp.2010[, i[[k]]:j[[k]]]))
}

t.f3.gdp.2012 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t.f3.gdp.2012[k,] <- t(as.matrix(f3.gdp.2012[, i[[k]]:j[[k]]]))
}


t.f3.hexp.2008 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t.f3.hexp.2008[k,] <- t(as.matrix(f3.hexp.2008[, i[[k]]:j[[k]]]))
}

t.f3.hexp.2010 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t.f3.hexp.2010[k,] <- t(as.matrix(f3.hexp.2010[, i[[k]]:j[[k]]]))
}

t.f3.hexp.2012 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t.f3.hexp.2012[k,] <- t(as.matrix(f3.hexp.2012[, i[[k]]:j[[k]]]))
}


# extracting CI as separate cells
t.f3.gdp.2008 <- as.data.frame(t.f3.gdp.2008, stringsAsFactors=FALSE)
ci.f3.gdp.2008 <- t(as.data.frame(as.numeric(unlist(strsplit(unlist(strsplit((unlist(strsplit(t.f3.gdp.2008$V2, " - "))), ")")), "\\(")))))

t.f3.gdp.2010 <- as.data.frame(t.f3.gdp.2010, stringsAsFactors=FALSE)
ci.f3.gdp.2010 <- t(as.data.frame(as.numeric(unlist(strsplit(unlist(strsplit((unlist(strsplit(t.f3.gdp.2010$V2, " - "))), ")")), "\\(")))))

t.f3.gdp.2012 <- as.data.frame(t.f3.gdp.2012, stringsAsFactors=FALSE)
ci.f3.gdp.2012 <- t(as.data.frame(as.numeric(unlist(strsplit(unlist(strsplit((unlist(strsplit(t.f3.gdp.2012$V2, " - "))), ")")), "\\(")))))


t.f3.hexp.2008 <- as.data.frame(t.f3.hexp.2008, stringsAsFactors=FALSE)
ci.f3.hexp.2008 <- t(as.data.frame(as.numeric(unlist(strsplit(unlist(strsplit((unlist(strsplit(t.f3.hexp.2008$V2, " - "))), ")")), "\\(")))))

t.f3.hexp.2010 <- as.data.frame(t.f3.hexp.2010, stringsAsFactors=FALSE)
ci.f3.hexp.2010 <- t(as.data.frame(as.numeric(unlist(strsplit(unlist(strsplit((unlist(strsplit(t.f3.hexp.2010$V2, " - "))), ")")), "\\(")))))

t.f3.hexp.2012 <- as.data.frame(t.f3.hexp.2012, stringsAsFactors=FALSE)
ci.f3.hexp.2012 <- t(as.data.frame(as.numeric(unlist(strsplit(unlist(strsplit((unlist(strsplit(t.f3.hexp.2012$V2, " - "))), ")")), "\\(")))))


# restructuring them
i <- seq(2, 11, by=3)
j <- seq(3, 12, by=3)

t3.ci.gdp.2008 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t3.ci.gdp.2008[k,] <- t(as.matrix(ci.f3.gdp.2008[, i[[k]]:j[[k]]]))
}

t3.ci.gdp.2010 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t3.ci.gdp.2010[k,] <- t(as.matrix(ci.f3.gdp.2010[, i[[k]]:j[[k]]]))
}

t3.ci.gdp.2012 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t3.ci.gdp.2012[k,] <- t(as.matrix(ci.f3.gdp.2012[, i[[k]]:j[[k]]]))
}


t3.ci.hexp.2008 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t3.ci.hexp.2008[k,] <- t(as.matrix(ci.f3.hexp.2008[, i[[k]]:j[[k]]]))
}

t3.ci.hexp.2010 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t3.ci.hexp.2010[k,] <- t(as.matrix(ci.f3.hexp.2010[, i[[k]]:j[[k]]]))
}

t3.ci.hexp.2012 <- matrix(NA, nrow=length(i), ncol=2)
for (k in 1:length(i)) {
  t3.ci.hexp.2012[k,] <- t(as.matrix(ci.f3.hexp.2012[, i[[k]]:j[[k]]]))
}


# creating color vectors
color.gdp.2008 <- ifelse(t3.ci.gdp.2008[,1] > 0, 1, 0)
color.gdp.2010 <- ifelse(t3.ci.gdp.2010[,1] > 0, 1, 0)
color.gdp.2012 <- ifelse(t3.ci.gdp.2012[,1] > 0, 1, 0)

color.hexp.2008 <- ifelse(t3.ci.hexp.2008[,1] > 0, 1, 0)
color.hexp.2010 <- ifelse(t3.ci.hexp.2010[,1] > 0, 1, 0)
color.hexp.2012 <- ifelse(t3.ci.hexp.2012[,1] > 0, 1, 0)


# combining y coordinates, point estimates, CIs, and color vector
y <- c(1,2,3,4)

f3.plot.gdp.2008 <- as.data.frame(cbind(y, gsub("\\*", "", t.f3.gdp.2008[,1]), t3.ci.gdp.2008, color.gdp.2008), stringsAsFactors=FALSE)
colnames(f3.plot.gdp.2008) <- c("y", "p", "low", "hi", "col")

f3.plot.gdp.2010 <- as.data.frame(cbind(y, gsub("\\*", "", t.f3.gdp.2010[,1]), t3.ci.gdp.2010, color.gdp.2010), stringsAsFactors=FALSE)
colnames(f3.plot.gdp.2010) <- c("y", "p", "low", "hi", "col")

f3.plot.gdp.2012 <- as.data.frame(cbind(y, gsub("\\*", "", t.f3.gdp.2012[,1]), t3.ci.gdp.2012, color.gdp.2012), stringsAsFactors=FALSE)
colnames(f3.plot.gdp.2012) <- c("y", "p", "low", "hi", "col")


f3.plot.hexp.2008 <- as.data.frame(cbind(y, gsub("\\*", "", t.f3.hexp.2008[,1]), t3.ci.hexp.2008, color.hexp.2008), stringsAsFactors=FALSE)
colnames(f3.plot.hexp.2008) <- c("y", "p", "low", "hi", "col")

f3.plot.hexp.2010 <- as.data.frame(cbind(y, gsub("\\*", "", t.f3.hexp.2010[,1]), t3.ci.hexp.2010, color.hexp.2010), stringsAsFactors=FALSE)
colnames(f3.plot.hexp.2010) <- c("y", "p", "low", "hi", "col")

f3.plot.hexp.2012 <- as.data.frame(cbind(y, gsub("\\*", "", t.f3.hexp.2012[,1]), t3.ci.hexp.2012, color.hexp.2012), stringsAsFactors=FALSE)
colnames(f3.plot.hexp.2012) <- c("y", "p", "low", "hi", "col")


# turning chars -> numeric
f3.plot.gdp.2008 <- data.frame(lapply(f3.plot.gdp.2008, as.numeric), stringsAsFactors=FALSE)
f3.plot.gdp.2010 <- data.frame(lapply(f3.plot.gdp.2010, as.numeric), stringsAsFactors=FALSE)
f3.plot.gdp.2012 <- data.frame(lapply(f3.plot.gdp.2012, as.numeric), stringsAsFactors=FALSE)

f3.plot.hexp.2008 <- data.frame(lapply(f3.plot.hexp.2008, as.numeric), stringsAsFactors=FALSE)
f3.plot.hexp.2010 <- data.frame(lapply(f3.plot.hexp.2010, as.numeric), stringsAsFactors=FALSE)
f3.plot.hexp.2012 <- data.frame(lapply(f3.plot.hexp.2012, as.numeric), stringsAsFactors=FALSE)


# transforming color indicator to actual color
f3.plot.gdp.2008$col <- ifelse(f3.plot.gdp.2008$col == 1, "red", "black")
f3.plot.gdp.2010$col <- ifelse(f3.plot.gdp.2010$col == 1, "red", "black")
f3.plot.gdp.2012$col <- ifelse(f3.plot.gdp.2012$col == 1, "red", "black")

f3.plot.hexp.2008$col <- ifelse(f3.plot.hexp.2008$col == 1, "red", "black")
f3.plot.hexp.2010$col <- ifelse(f3.plot.hexp.2010$col == 1, "red", "black")
f3.plot.hexp.2012$col <- ifelse(f3.plot.hexp.2012$col == 1, "red", "black")

# creating a custom color scale
colors <- colorRampPalette(c("black", "red"))(n = 2)
names(colors) <- levels(f3.plot.gdp.2008$col)
colScale <- scale_colour_manual(name = "cols", values = colors)

colRed <- scale_colour_manual(name = "cols", values = "#FF0000")



# plotting
gg3.plot.gdp.2008 <-
  ggplot(f3.plot.gdp.2008, aes(x=p, y=y, colour=col)) + 
  geom_errorbarh(aes(xmin=low, xmax=hi), size = 1, height = .1) +
  geom_point(size = 4) +
  geom_vline(xintercept = 0, linetype="dashed") +  # creating line at 0 as reference
  ggtitle("2008") +
  colRed +
  coord_cartesian(xlim=c(-0.2, 2)) +
  scale_x_continuous(breaks=seq(0, 2, 0.5)) +
  theme(plot.background = element_blank(), panel.background = element_blank(), panel.margin = unit(c(15,40,15,40), "points"),
        plot.title = element_text(hjust = 0.5, size = 24, color = "dodgerblue4", family="Helvetica", face = "bold"),
        axis.title = element_blank(), axis.text = element_text(size = 16), legend.position="none")

gg3.plot.gdp.2010 <-
  ggplot(f3.plot.gdp.2010, aes(x=p, y=y, colour=col)) + 
  geom_errorbarh(aes(xmin=low, xmax=hi), size = 1, height = .1) +
  geom_point(size = 4) +
  geom_vline(xintercept = 0, linetype="dashed") +
  ggtitle("2010") +
  colRed +
  coord_cartesian(xlim=c(-0.2, 2)) +
  scale_x_continuous(breaks=seq(0, 2, 0.5)) +
  theme(plot.background = element_blank(), panel.background = element_blank(), panel.margin = unit(c(15,40,15,40), "points"),
        plot.title = element_text(hjust = 0.5, size = 24, color = "dodgerblue4", family="Helvetica", face = "bold"),
        axis.title = element_blank(), axis.text = element_text(size = 16), legend.position="none")

gg3.plot.gdp.2012 <-
  ggplot(f3.plot.gdp.2012, aes(x=p, y=y, colour=col)) + 
  geom_errorbarh(aes(xmin=low, xmax=hi), size = 1, height = .1) +
  geom_point(size = 4) +
  geom_vline(xintercept = 0, linetype="dashed") +
  ggtitle("2012") +
  colRed +
  coord_cartesian(xlim=c(-0.2, 2)) +
  scale_x_continuous(breaks=seq(0, 2, 0.5)) +
  theme(plot.background = element_blank(), panel.background = element_blank(), panel.margin = unit(c(15,40,15,40), "points"),
        plot.title = element_text(hjust = 0.5, size = 24, color = "dodgerblue4", family="Helvetica", face = "bold"),
        axis.title = element_blank(), axis.text = element_text(size = 16), legend.position="none")


gg3.plot.hexp.2008 <-
  ggplot(f3.plot.hexp.2008, aes(x=p, y=y, colour=col)) + 
  geom_errorbarh(aes(xmin=low, xmax=hi), size = 1, height = .1) +
  geom_point(size = 4) +
  geom_vline(xintercept = 0, linetype="dashed") +  # creating line at 0 as reference
  ggtitle("2008") +
  colScale +
  coord_cartesian(xlim=c(-0.15, 0.3)) +
  scale_x_continuous(breaks=seq(-0.15, 0.15, 0.15)) +
  theme(plot.background = element_blank(), panel.background = element_blank(), panel.margin = unit(c(15,50,15,40), "points"),
        plot.title = element_text(hjust = 0.5, size = 24, color = "dodgerblue4", family="Helvetica", face = "bold"),
        axis.title = element_blank(), axis.text = element_text(size = 16), legend.position="none")

gg3.plot.hexp.2010 <-
  ggplot(f3.plot.hexp.2010, aes(x=p, y=y, colour=col)) + 
  geom_errorbarh(aes(xmin=low, xmax=hi), size = 1, height = .1) +
  geom_point(size = 4) +
  geom_vline(xintercept = 0, linetype="dashed") +
  ggtitle("2010") +
  colRed +
  coord_cartesian(xlim=c(-0.15, 0.3)) +
  scale_x_continuous(breaks=seq(-0.15, 0.15, 0.15)) +
  theme(plot.background = element_blank(), panel.background = element_blank(), panel.margin = unit(c(15,50,15,40), "points"),
        plot.title = element_text(hjust = 0.5, size = 24, color = "dodgerblue4", family="Helvetica", face = "bold"),
        axis.title = element_blank(), axis.text = element_text(size = 16), legend.position="none")

gg3.plot.hexp.2012 <-
  ggplot(f3.plot.hexp.2012, aes(x=p, y=y, colour=col)) + 
  geom_errorbarh(aes(xmin=low, xmax=hi), size = 1, height = .1) +
  geom_point(size = 4) +
  geom_vline(xintercept = 0, linetype="dashed") +
  ggtitle("2012") +
  colScale +
  coord_cartesian(xlim=c(-0.15, 0.3)) +
  scale_x_continuous(breaks=seq(-0.15, 0.15, 0.15)) +
  theme(plot.background = element_blank(), panel.background = element_blank(), panel.margin = unit(c(15,50,15,40), "points"),
        plot.title = element_text(hjust = 0.5, size = 24, color = "dodgerblue4", family="Helvetica", face = "bold"),
        axis.title = element_blank(), axis.text = element_text(size = 16), legend.position="none")

pdf("figure2.pdf", width = 25)
grid.arrange(gg3.plot.gdp.2008, gg3.plot.gdp.2010, gg3.plot.gdp.2012, ncol=3)
dev.off()

pdf("figure3.pdf", width = 25)
grid.arrange(gg3.plot.hexp.2008, gg3.plot.hexp.2010, gg3.plot.hexp.2012, ncol=3)
dev.off()
#############################################################

#############################################################
## 2. full tables for appendix
# t1: exporting to latex
print(xtable(t1.2008, caption = "2008 Results on adoption of smoking bans in restaurants or bars"),
      caption.placement="top",
      size="footnotesize",
      include.rownames=FALSE)

print(xtable(t1.2010, caption = "2010 Results on adoption of smoking bans in restaurants or bars"),
      caption.placement="top",
      size="footnotesize",
      include.rownames=FALSE)

print(xtable(t1.2012, caption = "2012 Results on adoption of smoking bans in restaurants or bars"),
      caption.placement="top",
      size="footnotesize",
      include.rownames=FALSE)

# t3: exporting to latex
# 3. exporting to latex
print(xtable(t3.2008, caption = "2008 Results on smoking ban compliance"),
      caption.placement="top",
      size="footnotesize")

print(xtable(t3.2010, caption = "2010 Results on smoking ban compliance"),
      caption.placement="top",
      size="footnotesize")

print(xtable(t3.2012, caption = "2012 Results on smoking ban compliance"),
      caption.placement="top",
      size="footnotesize")
#############################################################


#############################################################
########## Summary Statistics for all Three Years ########### 
#############################################################

#############################################################
## 1. Readying the data
# loading the three years of data (after they have undergone light pre-processing in Stata)
smoke.2008 <- read.dta("tmp/2008/smoke_2008_mod.dta")
smoke.2010 <- read.dta("tmp/2010/smoke_2010_mod.dta")
smoke.2012 <- read.dta("tmp/2012/smoke_2012_mod.dta")

# treating all missing the same for producing summary stats
smoke.2008[smoke.2008 == "m"] <- NA
smoke.2008$smoke_ban_enforcement <- as.numeric(smoke.2008$smoke_ban_enforcement)

smoke.2012[smoke.2012 == "m"] <- NA
smoke.2012$smoke_ban_enforcement <- as.numeric(smoke.2012$smoke_ban_enforcement)

# Note: the 2010 data do not contain the distinction we get rid of here
#############################################################

#############################################################
## 2. Summarizing the data
st1.nm <- cbind("Smoking ban in rest. or bars, total number", "Smoking ban compliance, median", "GDP p.c., current $", "Smoking prevalence, %", "Health care to total gov. expend., %", "Tobacco production, thousand metric tons")

# 2008
attach(smoke.2008)
sum.2008 <- cbind(smoke_ban_rest_or_bars_who, smoke_ban_enforcement, ln_gdp_pc, cigarette_consumption, health_exp_to_gov_exp, tobacco_production_filled0)
options(scipen=100)
options(digits=2)
s.2008 <- stat.desc(sum.2008)

attach(s.2008)
st1.2008 <- rbind(smoke_ban_rest_or_bars_who[7], smoke_ban_enforcement[8], exp(ln_gdp_pc[8]), cigarette_consumption[8], health_exp_to_gov_exp[8], tobacco_production_filled0[9])

# 2010
attach(smoke.2010)
sum.2010 <- cbind(smoke_ban_rest_or_bars_who, smoke_ban_enforcement, ln_gdp_pc, cigarette_consumption, health_exp_to_gov_exp, tobacco_production_filled0)
options(scipen=100)
options(digits=2)
s.2010 <- stat.desc(sum.2010)

attach(s.2010)
st1.2010 <- rbind(44, smoke_ban_enforcement[8], exp(ln_gdp_pc[8]), cigarette_consumption[8], health_exp_to_gov_exp[8], tobacco_production_filled0[9])

# 2012
attach(smoke.2012)
sum.2012 <- cbind(smoke_ban_rest_or_bars_who, smoke_ban_enforcement, ln_gdp_pc, cigarette_consumption, health_exp_to_gov_exp, tobacco_production_filled0)
options(scipen=100)
options(digits=2)
s.2012 <- stat.desc(sum.2012)

attach(s.2012)
st1.2012 <- rbind(smoke_ban_rest_or_bars_who[7], smoke_ban_enforcement[8], exp(ln_gdp_pc[8]), cigarette_consumption[8], health_exp_to_gov_exp[8], tobacco_production_filled0[9])

# combining
st1.all <- cbind(st1.2008, st1.2010, st1.2012)
colnames(st1.all) <- c("2008", "2010", "2012")
rownames(st1.all) <- st1.nm

# st1: exporting to latex
print(xtable(st1.all, caption = "Descriptive statistics, by year"),
      caption.placement="top",
      include.rownames=TRUE)
#############################################################
